1 Analysis of RNA-seq data using recovery rates

The goal of the current analysis is to identify:

  1. FA-amplified PTS/PNS targets
  2. FA-affected PTS/PNS targets
  3. Unique PTS/PNS targets
rnaseq_reads <- fread("~/GitHub/RNAseq_amelie/data/salmon_quant/Salmon_EstCount_ENSG.tsv")

ensembl <- useEnsembl(biomart="genes", dataset="hsapiens_gene_ensembl", mirror="asia")
  
bm <- getBM( attributes = c("external_gene_name","ensembl_gene_id"),
             filters    = "ensembl_gene_id",
             values     = rnaseq_reads$ENSG,
             mart       = ensembl)
bm <- as.data.table(bm)

rnaseq_reads <- merge.data.table(x = bm, y = rnaseq_reads, 
                                 by.x = "ensembl_gene_id", by.y = "ENSG", 
                                 all.y = TRUE)
rnaseq_reads[, ensembl_gene_id := NULL]

setnames(rnaseq_reads, "external_gene_name", "ENSG")

rnaseq_reads

1.1 Alternative proposal for log2(FC): Weighted Recovery Ratio

\(\text{Weighted Recovery Ratio} = \left(1 - \frac{\text{Treatment} - \text{Ctrl}}{Stress - \text{Ctrl}}\right) \cdot \left|Stress - \text{Ctrl\right|\)

Rationale

To quantify the extent to which a treatment (PTS or PNS) restores gene expression altered by fatty acid (FA) stress back to the healthy baseline (Ctrl). This metric accounts for both the proportional recovery (directional reversion toward Ctrl) and the biological significance of the fatty acid stress-induced changes (magnitude of the FA-Ctrl shift).

Components

Proportional Recovery (Term between parenthesis): This term captures the relative shift of gene expression under treatment compared to the shift caused by H₂O₂. A value closer to 1 indicates near-complete recovery to Ctrl, while values < 0 indicate expression moving further away from Ctrl.

Biological Magnitude Weighting | Absolute term |: This term weights the proportional recovery by the magnitude of the expression change induced by H₂O₂. This means that genes with larger H₂O₂-induced shifts are given more importance.

Ranges and Interpretation

If you consider x as the Weighted Recovery Ratio of any given gene:

x < 0 : The treatment pushes expression levels further away from Ctrl in the same direction as the stress effect. Meaning the treatment exacerbated the stress-induced dysregulation. This is not necessarily bad, since a “dysregulation” mechanism could help restore homeostasis.

x ∼ 0: Minimal recovery or no effect of the treatment.

0 < x < 1: Partial recovery, the treatment was moderately effective in reversing oxidative stress-induced dysregulation.

x ≥ 1: Complete or “over-recovery”, the treatment effectively restores expression to healthy levels, potentially also overshooting healthy baseline.

1.2 Calculate Recovery Rates

We first calculate average VST values for all genes within each experimental condition.

rnaseq_vst <- data.table(ENSG = rnaseq_reads$ENSG,
                         DESeq2:::vst(as.matrix(ceiling(rnaseq_reads[, 
                                                                     .SD, 
                                                                     .SDcols = !"ENSG"]))))
## converting counts to integer mode
rnaseq_vst <- melt(rnaseq_vst, id.vars = "ENSG", variable.name = "Condition", value.name = "VST")
rnaseq_vst[, Condition := gsub("[1-3]", "", Condition)]
rnaseq_vst[, VST := mean(VST), by = c("ENSG", "Condition")]
rnaseq_vst <- dcast(unique(rnaseq_vst), formula = ENSG ~ Condition, value.var = "VST")
rnaseq_vst

Next, we calculate the recovery ratios for each experimental condition

rnaseq_rank <- rnaseq_vst

rnaseq_rank[, AbsoluteEffect := FA - CTRL]
rnaseq_rank[, PTS_Recovery := round((1 - ((PTS - CTRL)/(AbsoluteEffect))) * abs(AbsoluteEffect), 5)]
rnaseq_rank[, PNS_Recovery := round((1 - ((PNS - CTRL)/(AbsoluteEffect))) * abs(AbsoluteEffect), 5)]

rnaseq_rank[, PTS_Recovery := ifelse(AbsoluteEffect == 0, 0, PTS_Recovery)]
rnaseq_rank[, PNS_Recovery := ifelse(AbsoluteEffect == 0, 0, PNS_Recovery)]

rnaseq_rank <- rnaseq_rank[, .SD, .SDcols = c("ENSG", "PTS_Recovery", "PNS_Recovery")]
setorder(rnaseq_rank, PTS_Recovery)
rnaseq_rank

1.3 Visualize Recovery rates

apply(rnaseq_rank[, .SD, .SDcols = -"ENSG"], MARGIN = 2, summary)
##         PTS_Recovery PNS_Recovery
## Min.    -0.924840000  -1.10422000
## 1st Qu. -0.016705000   0.00000000
## Median   0.000000000   0.00000000
## Mean     0.007973473   0.03113815
## 3rd Qu.  0.045287500   0.07102250
## Max.     2.493930000   2.41983000
recovery_dt <- melt(rnaseq_rank, id.vars = "ENSG", variable.name = "Condition", value.name = "WRR")
ggplot(recovery_dt, aes(x = WRR, fill = Condition)) + 
  geom_histogram(binwidth = 0.2) +
  geom_vline(xintercept = 0, colour = "black", linetype = "dashed") +
  scale_y_continuous(trans = "log1p", labels = comma, breaks = c(1, 10, 1000, 5000, 20000, 30000))+
  facet_wrap(~Condition)

The histograms represent the recovery rates calculated on VST counts. Most genes showed a moderate to large recovery centered at 0. Based on the summary statistics and histograms, seems like PNS had the highest recovery while GEN had the strongest effect overall.

1.4 Classification of genes based on WRR

rnaseq_rank[, PTS_Classification := ifelse(PTS_Recovery < -0.5, "EnhancedResponse", "NoResponse")]
rnaseq_rank[, PTS_Classification := ifelse(PTS_Recovery > 0.5, "RecoveryResponse", PTS_Classification)]

rnaseq_rank[, PNS_Classification := ifelse(PNS_Recovery < -0.5, "EnhancedResponse", "NoResponse")]
rnaseq_rank[, PNS_Classification := ifelse(PNS_Recovery > 0.5, "RecoveryResponse", PNS_Classification)]

setcolorder(rnaseq_rank, c("ENSG", "PTS_Recovery", "PTS_Classification"))
rnaseq_rank
dt_plot <- rnaseq_rank[, .(ENSG, PTS_Classification, PNS_Classification)]
dt_plot <- melt(dt_plot, id.vars = "ENSG")
dt_plot[, variable := stringr:::str_remove(variable, "_Classification")]

ggplot(dt_plot, aes(x = value, fill = variable)) +
  geom_bar(position=position_dodge()) + 
  facet_wrap( ~ variable) +
  scale_y_continuous(trans = "log10") +
  coord_flip() +
  labs(x = "", y = "Number of genes (log10)",
       fill = "Treatment")

upset_dt <- dt_plot[, .(ENSG, Class = paste(variable, value, sep = "_"))]
upset_dt <- dcast(upset_dt, ENSG ~ Class, fun.aggregate = length)

upset_mat <- as.matrix(upset_dt[, !"ENSG"])
rownames(upset_mat) <- upset_dt$ENSG

upset_mat <- make_comb_mat(upset_mat)

upset_mat
## A combination matrix with 6 sets and 7 combinations.
##   ranges of combination set size: c(19, 28960).
##   mode for the combination size: distinct.
##   sets are on rows.
## 
## Combination sets are:
##   PNS_EnhancedResponse PNS_NoResponse PNS_RecoveryResponse PTS_EnhancedResponse PTS_NoResponse PTS_RecoveryResponse   code  size
##                      x                                                        x                                     100100    19
##                      x                                                                       x                      100010    19
##                                     x                                         x                                     010100    29
##                                     x                                                        x                      010010 28960
##                                     x                                                                             x 010001    33
##                                                          x                                   x                      001010   127
##                                                          x                                                        x 001001    57
## 
## Sets are:
##                    set  size
##   PNS_EnhancedResponse    38
##         PNS_NoResponse 29022
##   PNS_RecoveryResponse   184
##   PTS_EnhancedResponse    48
##         PTS_NoResponse 29106
##   PTS_RecoveryResponse    90
UpSet(upset_mat)

# PNS Recovery & PTS NoResponse
extract_comb(upset_mat, "001010")
##   [1] "ADM2"           "AKT3"           "ALDH1A3"        "ALDH1L2"       
##   [5] "ALDH3A2"        "ANKRD1"         "ASPH"           "ATAT1"         
##   [9] "BLOC1S5-TXNDC5" "BOLA2B"         "C1QTNF6"        "C5orf34"       
##  [13] "CASTOR2"        "CD24"           "CDC42EP1"       "CDC42EP2"      
##  [17] "CDKN1C"         "CHAC1"          "CLN8"           "CNTNAP3C"      
##  [21] "CORO7"          "CSF3"           "CSGALNACT1"     "CYP1A1"        
##  [25] "CYP4F11"        "DAXX"           "EPB41L4A"       "EPHX1"         
##  [29] "ERRFI1"         "ETNK2"          "F3"             "FADS1"         
##  [33] "FAM220A"        "FAM83H"         "FAP"            "FLOT1"         
##  [37] "FOXO1"          "FOXQ1"          "FTH1"           "FTL"           
##  [41] "GFUS"           "GOLGA8B"        "HES1"           "HMOX1"         
##  [45] "HRK"            "INTS3"          "ITGB2"          "ITGB4"         
##  [49] "KNTC1"          "LTBP1"          "LTBP4"          "MAPK4"         
##  [53] "MARF1"          "MARVELD1"       "MCAM"           "MEGF6"         
##  [57] "MELTF"          "MLPH"           "MME"            "MT-ATP6"       
##  [61] "MT-ATP8"        "MT-CYB"         "MT-ND1"         "MT-ND4"        
##  [65] "MT-ND4L"        "MT-ND5"         "MT-ND6"         "MT1X"          
##  [69] "MTATP6P1"       "MYC"            "MYZAP"          "NAV3"          
##  [73] "NDRG1"          "NIBAN1"         "NLRP1"          "NOXA1"         
##  [77] "NPIPA1"         "NPIPA7"         "OPLAH"          "OSGIN1"        
##  [81] "PAN2"           "PCDH1"          "PCLO"           "PEDS1-UBE2V1"  
##  [85] "PIR"            "PLCH2"          "PPIF"           "PPIP5K1P1"     
##  [89] "PPP1R11"        "PPP2R3B"        "PRDX1"          "PRKACA"        
##  [93] "PRR5L"          "PRSS23"         "PSMB3"          "PSMC1P1"       
##  [97] "RAB7B"          "RAC1P2"         "RGS2"           "RNF39"         
## [101] "RPL23AP47"      "RPS9"           "SCNN1A"         "SCNN1G"        
## [105] "SERPINB2"       "SH3TC1"         "SIK1"           "SLC1A4"        
## [109] "SLC6A9"         "SQSTM1"         "SRGAP3"         "STARD10"       
## [113] "SYNRG"          "TGFB2"          "TGM2"           "THBS1"         
## [117] "TIMP3"          "TIPARP"         "TKT"            "TNS4"          
## [121] "TRIM39"         "TRMT6"          "TXNRD1"         "UNC5B"         
## [125] "VGLL3"          "VSTM2L"         "ZBED6"
# PNS Enhanced & PTS NoResponse
extract_comb(upset_mat, "100010")
##  [1] "AKAP12"        "CHTF18"        "DBP"           "DCAF11"       
##  [5] "EME1"          "F2R"           "FBXO32"        "GBE1"         
##  [9] "GPCPD1"        "IFNAR2-IL10RB" "IFNGR2"        "KIF21B"       
## [13] "KLF10"         "MAP1B"         "MT-ND2"        "RGPD5"        
## [17] "RUFY1"         "TMEM265"       "VPS52"
# PTS Recovery & PNS NoResponse
extract_comb(upset_mat, "010001")
##  [1] "ABCC2"          "ANGPTL4"        "AOX1"           "ARMCX5-GPRASP2"
##  [5] "BHLHE40"        "CCNA1"          "DCAF15"         "DDIT4"         
##  [9] "DST"            "IMPA2"          "ITGB8"          "KANSL1"        
## [13] "KPNA2"          "LAMA3"          "NDUFA6"         "NOTCH3"        
## [17] "NSF"            "NT5E"           "PDK4"           "PFDN6"         
## [21] "PKN1"           "POLR1H"         "PPP1R10"        "RPL13AP7"      
## [25] "RPP21"          "RXRB"           "SYNJ2BP-COX16"  "TBC1D3C"       
## [29] "TRIM56"         "TRIM61"         "TSC22D1"        "UBALD2"        
## [33] "ZBTB22"
# PTS Enhanced & PNS NoResponse
extract_comb(upset_mat, "010100")
##  [1] "ARL2-SNX15" "ATF6B"      "C1S"        "CDCA2"      "CDK1"      
##  [6] "CELSR2"     "CFB"        "CHI3L1"     "CXCL2"      "DDIT3"     
## [11] "DLL1"       "DTL"        "FAM156B"    "GBP2"       "GDF15"     
## [16] "GFPT2"      "H2AC19"     "HERPUD1"    "MFSD2A"     "PIGW"      
## [21] "PKD1P3"     "S100A14"    "SAA2"       "SERPINB4"   "SYNE2"     
## [26] "TMT1A"      "TP63"       "TPX2"       "ZC3H12A"

1.5 Overlap with DEGs

degs <- fread("~/GitHub/RNAseq_amelie/output/DEG_results.tsv")
degs <- degs[, .(external_gene_name, log2FoldChange, padj, Group)]
degs <- unique(degs[external_gene_name != ""])

# Add Categories of interest
degs[, WRR_category := "None"]
# PNS Recovery & PTS NoResponse
degs[external_gene_name %in% extract_comb(upset_mat, "001010"), 
     WRR_category := "PNS_Recovery"]
# PTS Recovery & PTS NoResponse
degs[external_gene_name %in% extract_comb(upset_mat, "001010"), 
     WRR_category := "PTS_Recovery"]
# PNS Enhanced & PTS NoResponse
degs[external_gene_name %in% extract_comb(upset_mat, "100010"), 
     WRR_category := "PNS_Enhanced"]
# PTS Enhanced & PTS NoResponse
degs[external_gene_name %in% extract_comb(upset_mat, "010100"), 
     WRR_category := "PTS_Enhanced"]

degs
temp <- degs[padj < alpha & WRR_category != "None" & Group == "PNS_vs_FA", 
             .(external_gene_name, log2FoldChange, WRR_category)]
setorder(temp, "log2FoldChange")
temp
temp <- degs[padj < alpha & WRR_category != "None" & Group == "PTS_vs_FA", 
             .(external_gene_name, log2FoldChange, WRR_category)]
setorder(temp, "log2FoldChange")
temp